You must read the data before trying to run code on your own machine. To read data use the following code after setting your working directory. To set your working directory, modify the following to set the file path for the folder where the data file resides. setwd(’c:/thatawesomeclass/)
Here is AirBnb data on rentals in New York city for a couple of months. Since the data contains locations of the rentals, this is spatial data. The features of each rental are related to a single location, therefore this is called point data.
data = read.csv('analysisData_with_lonlat.csv')What does the plot look like?
library(ggplot2)
ggplot(data) +
geom_point(aes(x=longitude,y=latitude),size=0.1)Google has recently changed its API requirements, and ggmap users are now required to provide an API key and enable billing. ggmap itself is outdated on CRAN. Until the new version is available on CRAN, you can install an updated version by running the following code
# if(!requireNamespace("devtools")) install.packages("devtools")
# devtools::install_github("dkahle/ggmap", ref = "tidyup")When you load ggmap, you can set your API key with register_google() (see ?register_google for details), but don’t forget to enable the Maps Static API in the Google Cloud interface and enable billing!
For you to use the google maps code below you will have to get your own api from Google. Once you have obtained an api key, you can assign it to the object api_key.
library(ggmap)
register_google(key = api_key)Get map of New York City by setting location as center of the city.
library(ggmap)
newyork_map = get_map(location=c(-73.93,40.77), zoom=10, scale=4)
ggmap(newyork_map)Add a data layer: Map of New York City + AirBnb Rentals
ggmap(newyork_map)+
geom_point(data=data, aes(x=longitude,y=latitude), size=0.1)Address overplotting by setting alpha=0.2, and change the color of rental location to seagreen
ggmap(newyork_map)+
geom_point(data=data, aes(x=longitude,y=latitude), size=0.1, alpha=0.2, color='seagreen')Represent price using Yellow-orange-red scale
library(RColorBrewer)
ggmap(newyork_map)+
geom_point(data=data, aes(x=longitude,y=latitude,color=price), size=0.1, alpha=0.2)+
scale_color_gradientn(colors=brewer.pal(n=7, name='YlOrRd'))manhattan = get_map(location=c(-73.93,40.77), zoom=12, scale=4, maptype='satellite')
ggmap(manhattan)Price is being converted into five percentile bins to make it easier to spot location differences in price.
library(Hmisc)
ggmap(manhattan)+
geom_point(data=data, aes(x=longitude,y=latitude,color=cut2(data$price,g=5)), size=0.3)+
scale_color_brewer(type ='seq', palette='RdYlGn', direction=-1)Sometimes, all we want is a simple old fashioned map.
manhattan = get_map(location=c(-73.93,40.77), zoom=12, scale=4, maptype='toner')
ggmap(manhattan)ggmap(manhattan)+
geom_point(data=data, aes(x=longitude,y=latitude,color=cut2(data$price,g=5)), size=0.3)+
scale_color_brewer(type='seq', palette='RdYlGn', direction =-1)manhattan = get_map(location=c(-73.93,40.77), zoom=12, scale=4, maptype='terrain', source='stamen')
ggmap(manhattan)ggmap(manhattan)+
geom_point(data=data,aes(x=longitude,y=latitude,color=cut2(data$price,g=5)), size=0.3)+
scale_color_brewer(type='seq', palette='RdYlGn', direction=-1)manhattan = get_map(location = c(-73.93,40.77),zoom = 12,scale = 4,maptype = 'roadmap')
ggmap(manhattan)ggmap(manhattan)+
geom_point(data=data,aes(x=longitude,y=latitude,color=cut2(data$price,g=5)), size=0.3)+
scale_color_brewer(type='seq', palette='RdYlGn', direction=-1)Let’s get familar with ggmaps and google maps by plotting Cold Stone Creamery (73.98W, 40.75N).
You will note ggmap uses a layered approach to plotting like ggplot2. However, unlike ggplot2 where the data is typically in the ggplot() layer, when using ggmap() the data pushed down to the plotting layers (e.g., geom_point, geom_label)
coldStoneCreamery_street = get_map(location=c(-73.988640,40.757020), zoom=18 , scale=2)## Source : https://maps.googleapis.com/maps/api/staticmap?center=40.75702,-73.98864&zoom=18&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx-x-ND2iyfra0D14HsZjfL_TKqll4
ggmap(coldStoneCreamery_street)+
geom_point(data=data.frame(lon=-73.988640,lat=40.757020),
mapping=aes(lon,lat),size=4,color='steelblue')+
geom_label(data=data.frame(lon=-73.988640,lat=40.757020),
mapping=aes(lon,lat), label='Cold Stone Creamery', nudge_y=0.0002, size=2)Above maps rely on Google API, which for now is free. An alternative is to use US Census Shapefiles from the TIGER/Line geodatabase. tigris is an R package that allows users to directly download and use TIGER/Line shapefiles. These files belong to the class sp.
library(tigris)
manhattan_tigris = tracts(state='NY', county='New York', cb=TRUE)library(tmap) offers a flexible, layer-based, and easy to use approach to create thematic maps, such as choropleths and bubble maps. It only works with sp, sf or raster objects, but that is okay because most spatial files are spatial objects.
library(tmap)
tm_shape(manhattan_tigris)+
tm_borders()Of course, if the data is not a spatial object, it will have to be converted to one before it can be plotted using library(tmap). Furthermore, to overlay sp class data on a map, both must have the same projection.
library(sp)
data_sp = data
coordinates(data_sp) =~longitude+latitudetm_shape(manhattan_tigris)+
tm_borders()+
tm_shape(data_sp)+
tm_dots(col='steelblue', size=0.01)library(rgdal) has a handy function to open shapefiles. In the code below, ‘nyc_shape’ is a folder on my computer that contains a set of nyc shape files that have been gathered from the open New York Neighborhood Tabulation Areas (NTA) database ( data pulled on Aug.5, 2019 )
library(sp)
library(rgdal)
nyc = readOGR(dsn='nyc_shape', layer='nynta')## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\Kitty\Desktop\AAFM2 2022S\11_SpatialAnalysis\nyc_shape", layer: "nynta"
## with 195 features
## It has 7 fields
Object Type
class(nyc) # object type## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
Coordinate system
summary(nyc) # coordinate system## Object of class SpatialPolygonsDataFrame
## Coordinates:
## min max
## x 913175.1 1067382.5
## y 120121.9 272844.3
## Is projected: TRUE
## proj4string :
## [+proj=lcc +lat_0=40.1666666666667 +lon_0=-74 +lat_1=41.0333333333333
## +lat_2=40.6666666666667 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft
## +no_defs]
## Data attributes:
## BoroCode BoroName CountyFIPS NTACode
## Min. :1 Length:195 Length:195 Length:195
## 1st Qu.:2 Class :character Class :character Class :character
## Median :3 Mode :character Mode :character Mode :character
## Mean :3
## 3rd Qu.:4
## Max. :5
## NTAName Shape_Leng Shape_Area
## Length:195 Min. : 11988 Min. : 5581768
## Class :character 1st Qu.: 23921 1st Qu.: 19392232
## Mode :character Median : 30549 Median : 32629789
## Mean : 42006 Mean : 43228063
## 3rd Qu.: 41877 3rd Qu.: 50223047
## Max. :490421 Max. :327765030
Slots
str(nyc,max.level = 2) # slots in sp object## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 195 obs. of 7 variables:
## ..@ polygons :List of 195
## ..@ plotOrder : int [1:195] 113 182 181 133 174 185 140 98 143 137 ...
## ..@ bbox : num [1:2, 1:2] 913175 120122 1067383 272844
## .. ..- attr(*, "dimnames")=List of 2
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
## ..$ comment: chr "TRUE"
Projection Type
proj4string(nyc)## [1] "+proj=lcc +lat_0=40.1666666666667 +lon_0=-74 +lat_1=41.0333333333333 +lat_2=40.6666666666667 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs"
nyc@proj4string## CRS arguments:
## +proj=lcc +lat_0=40.1666666666667 +lon_0=-74 +lat_1=41.0333333333333
## +lat_2=40.6666666666667 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft
## +no_defs
Bounding Box
nyc@bbox # bounding box## min max
## x 913175.1 1067382.5
## y 120121.9 272844.3
Data Slot
head(nyc@data) # data slotSubsetting a SpatialPolygonsDataFrame
str(nyc[1,], max.level=2)## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 1 obs. of 7 variables:
## ..@ polygons :List of 1
## ..@ plotOrder : int 1
## ..@ bbox : num [1:2, 1:2] 982208 162478 991773 174108
## .. ..- attr(*, "dimnames")=List of 2
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
Extract rows. Subset the first census tract in the dataset and plot it
plot(nyc[1,]) But, geographical entities only make sense in context. So, let us overlay census tract 1 on a plot of nyc.
plot(nyc,col='steelblue')
plot(nyc[1,], add=T, col='tomato2')Next, extract census tract 2
plot(nyc,col='steelblue')
plot(nyc[1,], add=T, col='tomato2')
plot(nyc[2,], add=T, col='yellow')Finally, extract censust tracts 20:60 and plot.
plot(nyc,col='steelblue')
plot(nyc[1,], add=T, col='tomato2')
plot(nyc[2,], add=T, col='yellow')
plot(nyc[20:60,], add=T, col='green')Extract Columns
nyc@data$BoroName #or nyc$BoroName## [1] "Brooklyn" "Queens" "Queens" "Queens"
## [5] "Manhattan" "Queens" "Queens" "Brooklyn"
## [9] "Bronx" "Queens" "Manhattan" "Queens"
## [13] "Queens" "Queens" "Brooklyn" "Brooklyn"
## [17] "Brooklyn" "Bronx" "Brooklyn" "Queens"
## [21] "Queens" "Bronx" "Queens" "Queens"
## [25] "Brooklyn" "Brooklyn" "Brooklyn" "Brooklyn"
## [29] "Brooklyn" "Brooklyn" "Brooklyn" "Brooklyn"
## [33] "Brooklyn" "Brooklyn" "Bronx" "Brooklyn"
## [37] "Brooklyn" "Brooklyn" "Brooklyn" "Queens"
## [41] "Brooklyn" "Bronx" "Brooklyn" "Manhattan"
## [45] "Queens" "Queens" "Bronx" "Bronx"
## [49] "Bronx" "Bronx" "Brooklyn" "Brooklyn"
## [53] "Queens" "Bronx" "Queens" "Queens"
## [57] "Staten Island" "Manhattan" "Queens" "Queens"
## [61] "Queens" "Queens" "Queens" "Queens"
## [65] "Queens" "Bronx" "Queens" "Queens"
## [69] "Queens" "Bronx" "Brooklyn" "Queens"
## [73] "Brooklyn" "Brooklyn" "Queens" "Queens"
## [77] "Queens" "Queens" "Queens" "Brooklyn"
## [81] "Queens" "Bronx" "Bronx" "Staten Island"
## [85] "Staten Island" "Bronx" "Staten Island" "Brooklyn"
## [89] "Brooklyn" "Staten Island" "Bronx" "Queens"
## [93] "Queens" "Queens" "Manhattan" "Manhattan"
## [97] "Queens" "Staten Island" "Manhattan" "Queens"
## [101] "Manhattan" "Manhattan" "Manhattan" "Brooklyn"
## [105] "Queens" "Queens" "Staten Island" "Brooklyn"
## [109] "Bronx" "Manhattan" "Brooklyn" "Manhattan"
## [113] "Staten Island" "Staten Island" "Staten Island" "Queens"
## [117] "Bronx" "Bronx" "Brooklyn" "Brooklyn"
## [121] "Bronx" "Brooklyn" "Brooklyn" "Brooklyn"
## [125] "Brooklyn" "Brooklyn" "Queens" "Queens"
## [129] "Queens" "Queens" "Bronx" "Queens"
## [133] "Queens" "Brooklyn" "Brooklyn" "Brooklyn"
## [137] "Queens" "Manhattan" "Manhattan" "Staten Island"
## [141] "Staten Island" "Manhattan" "Brooklyn" "Brooklyn"
## [145] "Manhattan" "Queens" "Queens" "Staten Island"
## [149] "Manhattan" "Manhattan" "Brooklyn" "Staten Island"
## [153] "Bronx" "Bronx" "Manhattan" "Bronx"
## [157] "Bronx" "Bronx" "Manhattan" "Manhattan"
## [161] "Manhattan" "Manhattan" "Bronx" "Manhattan"
## [165] "Manhattan" "Manhattan" "Manhattan" "Bronx"
## [169] "Bronx" "Bronx" "Bronx" "Bronx"
## [173] "Bronx" "Bronx" "Bronx" "Bronx"
## [177] "Manhattan" "Manhattan" "Bronx" "Bronx"
## [181] "Brooklyn" "Queens" "Brooklyn" "Brooklyn"
## [185] "Staten Island" "Queens" "Queens" "Staten Island"
## [189] "Staten Island" "Queens" "Queens" "Staten Island"
## [193] "Staten Island" "Brooklyn" "Brooklyn"
table(nyc$BoroName)##
## Bronx Brooklyn Manhattan Queens Staten Island
## 38 51 29 58 19
Plot only Manhattan
plot(nyc[nyc$BoroName=='Manhattan',])Plot the five boroughs Let’s visualizing with library(tmap) Learn more about library(tmap) in this vignette
library(tmap)
tm_shape(nyc)+
tm_borders()tm_shape(nyc)+
tm_polygons('BoroName')tm_shape(nyc)+
tm_polygons('BoroName')+
tm_style('natural')+ # works like library(ggthemes)
tm_compass()nyc_map =
tm_shape(nyc)+
tm_polygons('BoroName')+
tm_style('natural') # works like library(ggthemes)
tmap_save(nyc_map, filename='nyc_map.jpg') # picture files
tmap_save(nyc_map, filename='nyc_map.html') # Saving as html creates an interactive map Let us plot population data on to a map of New York. Population Data is available from Open New York. In this instance, I have downloaded data on to my local machine before reading it into R. This is a good practice as repeatedly downloading relatively static data (population data is not updated daily or even weekly) by reading from the website directly places an unnecessary burden on the server.
Here, we read the data and create a field for change in population.
pop = read.csv('nyc_pop.csv')
library(dplyr)
library(tidyr)
nyc_pop =
pop%>%
spread(key=Year, value=Population)%>%
rename(year_2000='2000', year_2010='2010')%>%
mutate(pop_change=((year_2010-year_2000)/year_2000)*100)
head(nyc_pop)library(rgeos)
nyc_spdf = merge(x=nyc, y=nyc_pop, by.x='NTACode', by.y='NTA.Code')
tm_shape(nyc_spdf)+
tm_fill(col='pop_change', palette=brewer.pal(n=7,name='BrBG'),
breaks=c(-300,-200,-100,0,100,200,300))+
tm_style('cobalt')This file was generated using R Version 4.1.2